Homepage
The formatted source code for this file is here.
And a raw version here.
Previous work by Youngser Park can be found here.
Here we read in the data and select a random half of it for exploration.
featFull <- fread("../data/synapsinR_7thA.tif.Pivots.txt.2011Features.txt",showProgress=FALSE)
### Setting a seed and creating an index vector
### to select half of the data
set.seed(2^10)
half1 <- sample(dim(featFull)[1],dim(featFull)[1]/2)
half2 <- setdiff(1:dim(featFull)[1],half1)
feat <- featFull[half1,]
dim(feat)# [1] 559649 144
## Setting the channel names
channel <- c('Synap_1','Synap_2','VGlut1_t1','VGlut1_t2','VGlut2','Vglut3',
'psd','glur2','nmdar1','nr2b','gad','VGAT',
'PV','Gephyr','GABAR1','GABABR','CR1','5HT1A',
'NOS','TH','VACht','Synapo','tubuli','DAPI')
## Setting the channel types
channel.type <- c('ex.pre','ex.pre','ex.pre','ex.pre','ex.pre','in.pre.small',
'ex.post','ex.post','ex.post','ex.post','in.pre','in.pre',
'in.pre','in.post','in.post','in.post','in.pre.small','other',
'ex.post','other','other','ex.post','none','none')
channel.type2 <- c('ex.pre','ex.pre','ex.pre','ex.pre','ex.pre','other',
'ex.post','ex.post','ex.post','ex.post','in.pre','in.pre',
'in.pre','in.post','in.post','in.post','other','other',
'ex.post','other','other','ex.post','other','other')
nchannel <- length(channel)
nfeat <- ncol(feat) / nchannel
## Createing factor variables for channel and channel type sorted properly
ffchannel <- (factor(channel.type,
levels= c("ex.pre","ex.post","in.pre","in.post","in.pre.small","other","none")
))
fchannel <- as.numeric(factor(channel.type,
levels= c("ex.pre","ex.post","in.pre","in.post","in.pre.small","other","none")
))
ford <- order(fchannel)
## Setting up colors for channel types
Syncol <- c("#197300","#5ed155","#660000","#cc0000","#ff9933","#0000cd","#ffd700")
Syncol3 <- c("#197300","#197300","#cc0000","#cc0000","#0000cd","#0000cd","#0000cd")
ccol <- Syncol[fchannel]
ccol3 <- Syncol3[fchannel]
exType <- factor(c(rep("ex",11),rep("in",6),rep("other",7)),ordered=TRUE)
exCol<-exType;levels(exCol) <- c("#197300","#990000","#0000cd");
exCol <- as.character(exCol)
fname <- as.vector(sapply(channel,function(x) paste0(x,paste0("F",0:5))))
names(feat) <- fname
fcol <- rep(ccol, each=6)
mycol <- colorpanel(100, "purple", "black", "green")
mycol2 <- matlab.like(nchannel)
mycol3 <- colorpanel(100, "blue","white","red")f <- lapply(1:6,function(x){seq(x,ncol(feat),by=nfeat)})
featF <- lapply(f,function(x){subset(feat,select=x)})
featF0 <- featF[[1]]
f01e3 <- 1e3*data.table(apply(X=featF0,2,function(x){((x-min(x))/(max(x)-min(x)))}))
fs <- f01e3
### Taking log_10 on data + 1.
log1f <- log10(featF0 + 1)
slog1f <- data.table(scale(log1f, center=TRUE,scale=TRUE))We now have the following data sets:
featF0: The feature vector looking only at the integrated brightness features.fs: The feature vector scaled between \([0,1000]\).logf1: The feature vector, plus one, then \(log_{10}\) is applied.slog1f: The feature vector, plus one, \(log_{10}\), then scaled by subtracting the mean and dividing by the sample standard deviation.corrf <- lapply(lapply(featF, cor), function(x) x[ford, ford])
titles <- paste('Correlation Matrix of', paste0("F", 0:5))
par(mfrow = c(3,2))
for(i in 1:length(corrf)) {
corrplot(corrf[[i]],method="color",tl.col=ccol[ford],
tl.cex=0.8, mar=c(1,0,1,1.5))
title(titles[i])
}bford <- order(rep(fchannel,each=6))
nord <- Reduce('c', f)
cr <- rep(ccol, each=6)
corrfB <- cor(feat)[bford,bford]
corrplot(corrfB,method="color",tl.col=cr[bford],tl.cex=0.75)computeDcor <- function(x) {
set.seed(317)
sam1 <- sample(dim(x)[1], min(1e3,dim(x)[1]))
tmp <- as.data.frame((x[sam1,]))
combcols <- t(combn(24,2))
dc <- foreach(i = 1:dim(combcols)[1]) %dopar% {
set.seed(331*i)
dcor.ttest(x=tmp[,combcols[i,1]],y=tmp[,combcols[i,2]])
}
ms <- matrix(as.numeric(0),24,24)
mp <- matrix(as.numeric(0),24,24)
for(i in 1:length(dc)){
ms[combcols[i,1],combcols[i,2]] <- dc[[i]]$statistic
ms[combcols[i,2],combcols[i,1]] <- dc[[i]]$statistic
mp[combcols[i,1],combcols[i,2]] <- dc[[i]]$p.val
mp[combcols[i,2],combcols[i,1]] <- dc[[i]]$p.val
}
rownames(ms) <- colnames(x)
rownames(mp) <- colnames(x)
colnames(ms) <- colnames(x)
colnames(mp) <- colnames(x)
diag(ms) <- as.numeric(0)
diag(mp) <- as.numeric(1)
return(list(ms, mp))
}
mdcor <- lapply(featF, computeDcor)cl5 <- colorRampPalette(c("white", "blue"))
gr5 <- colorRampPalette(c("darkgreen", "white", "white"))
bl5 <- colorRampPalette(c("blue", "red"))
sTitle <- paste("dcor.ttest statistic", paste0("F", 0:5))
pTitle <- paste("dcor.ttest log10(p-value)", paste0("F", 0:5))
par(mfcol=c(6,2), oma=c(1,1,1,1))
for(i in 1:length(mdcor)){
corrplot(mdcor[[i]][[1]][ford,ford],is.corr=FALSE,method="color",
tl.col=ccol[ford], tl.cex=0.8, mar=c(1,0,1,1.5))
title(sTitle[i])
corrRect(as.numeric(table(fchannel)),col=Syncol,lwd=4)
}
for(i in 1:length(mdcor)){
corrplot((log10(mdcor[[i]][[2]][ford,ford]+.Machine$double.eps)),is.corr=FALSE,method="color",tl.col=ccol[ford],
tl.cex=0.8, mar=c(1,0,1,1.5),
p.mat=mdcor[[i]][[2]][ford,ford],
sig.level=0.01, pch = 'x', pch.cex=0.5)
title(pTitle[i])
corrRect(as.numeric(table(fchannel)),col=Syncol,lwd=4)
}The X’s in the above right figure denote a p-value greater than 0.01.
We will run PCA on the untransformed correlation matrix so the data can be viewed in 2-dimensions. The colors correspond to synapse type.
pcaL <- lapply(corrf, prcomp, center=TRUE, scale=TRUE)
titlepca <- paste("PCA on ", paste0("cor(F", 0:5, ')'))
for(i in 1:length(pcaL)) {
pairs(pcaL[[i]]$x[,1:3], col=ccol3[ford],pch=20, cex=2,
main=titlepca[i])
}pcaB <- prcomp(corrfB,center=TRUE, scale=TRUE)
pairs(pcaB$x[,1:3], col=cr[bford],pch=20, cex=2)plot(pcaB$x[,1:3], col=cr[bford],pch=20, cex=2)
text(pcaB$x[,1:2], labels=rownames(pcaB$x), pos=4, col=cr[bford])pcaB <- pcaB$x
rgl::plot3d(pcaB[,1],pcaB[,2],pcaB[,3],type='s',col=cr[bford], size=1)
rgl::rgl.texts(pcaB[,1],pcaB[,2],pcaB[,3],abbreviate(rownames(pcaB)), col=cr[bford], adj=c(0,2))
subid <- currentSubscene3d()
rglwidget(elementId="rgl-pcaB",width=720,height=720)el2 <- lapply(pcaL, function(x) getElbows(x$sdev, plot=FALSE))
del2 <- lapply(pcaL, function(x) {
elb <- getElbows(x$sdev, plot=FALSE)[1]
return(x$x[,1:elb])
})
d1 <- lapply(del2, function(x) data.frame(type = exType, x))
lda.fit <- lapply(d1, function(x) lda(type ~ ., data = x) )
rf.fit <- lapply(d1, function(x) randomForest(type ~ ., data=x))
rf.pred <- lapply(rf.fit, '[[', 3)LDA was run using only the principal components up to and including the first elbow from ZG from the untransformed correlation matrix.
titlesvor <- paste("LDA decision boundaries for", paste0("F", 0:5))
voronoidf <- lapply(lapply(lda.fit, '[[', 3), data.frame)
#voronoidf <- data.frame(x=lda.fit$means[,1],y=lda.fit$means[,2])
#This creates the voronoi line segments
par(mfrow = c(3,2))
for(i in 1:length(d1)){
plot(d1[[i]][,2:3], col=ccol3[ford], pch=20, cex=1.5)
title(titlesvor[i])
text(d1[[i]][,2:3], labels=rownames(d1[[i]]),
pos=ifelse(d1[[i]][,2]<max(d1[[i]][,2] -0.5),4,2),
col=ccol3[ford], cex=1.2)
deldir(x = voronoidf[[i]][,1],y = voronoidf[[i]][,2], rw = c(-15,15,-15,15),
plotit=TRUE, add=TRUE, wl='te')
text(voronoidf[[i]], labels=rownames(voronoidf[[i]]), cex=1.5, pos=1)
}truth <- lapply(d1, '[[', 1)
pred <- lapply(lapply(lda.fit, predict), '[[', 1)
for( i in 1:length(truth)) {
print(table(truth = truth[[i]],pred = pred[[i]]) )
}# pred
# truth ex in other
# ex 10 0 1
# in 1 5 0
# other 1 0 6
# pred
# truth ex in other
# ex 10 0 1
# in 1 5 0
# other 1 0 6
# pred
# truth ex in other
# ex 10 0 1
# in 1 5 0
# other 0 2 5
# pred
# truth ex in other
# ex 9 0 2
# in 1 5 0
# other 0 2 5
# pred
# truth ex in other
# ex 10 0 1
# in 1 5 0
# other 1 0 6
# pred
# truth ex in other
# ex 11 0 0
# in 0 6 0
# other 0 0 7
lda.err <- list()
for(i in 1:6) {
tr <- as.numeric(truth[[i]])
pr <- as.numeric(pred[[i]])
lda.err[[i]] <- sum( tr != pr ) / length(tr)
}
Reduce('c', lda.err)# [1] 0.1250000 0.1250000 0.1666667 0.2083333 0.1250000 0.0000000
The above gives the LDA error rates for each of the features.